using
CSV,
Dates,
DataFrames,
Markdown,
Plots,
Statistics,
StatsPlots
gr()
;
death_path = "COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv"
confirmed_path = "COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"
;
death = CSV.read(death_path) |> DataFrame! |> x -> rename!(x, Dict(Symbol("Province/State") => :state, Symbol("Country/Region") => :country))
confirmed = CSV.read(confirmed_path) |> DataFrame! |> x -> rename!(x, Dict(Symbol("Province/State") => :state, Symbol("Country/Region") => :country))
;
"""
convert an array of columns and column names into a markdown table
parameters:
cols : Array[Array[Any]] | DataFrame
an array of arrays which are the columns of the table
names : Array[String]
an array of column names. If inputting a df, input `string.(names(df))`
df : Boolean
whether or not the cols are an array of arrays or a dataframe
returns: String
string of markdown text
"""
function markdown_table(cols, names; df=false)
if df
cols = [col for col in eachcol(cols)]
end
output_string = ""
cols = [repr.(col) for col in cols]
for i in 1:length(names)
output_string = output_string * "|" * names[i]
end
output_string *= "\n"
for i in 1:length(names)
output_string = output_string * "|" * "---"
end
output_string *= "\n"
for row in 1:length(cols[1])
for col in 1:length(cols)
output_string = output_string * "|" * cols[col][row]
end
output_string *= "\n"
end
# displaying strings without quotes
output_string = replace(output_string, "\"" => "")
return output_string
end
;
function country_data(country; state=false, start_date=Dates.Date(2020, 1, 22), end_date="max")
aggregate(x) = [sum(col) for col in eachcol(x)]
state_mask = typeof(state) == String ? death.state .== state : true
d = death[(death.country .== country) .& state_mask, 5:end] |> aggregate
dates = [Dates.Date(2020, 1, 22) + Dates.Day(day) for day in 1:length(d)] |> x -> reshape(x, :, 1)
state_mask = typeof(state) == String ? confirmed.state .== state : true
c = confirmed[(confirmed.country .== country) .& state_mask, 5:end] |> aggregate
df = convert(DataFrame, dates) |> x -> rename!(x, [:date])
country_name = typeof(state) == String ? country * "-" * state : country
df[!, :country] .= country_name
df[!, :confirmed] = c
df[!, :deaths] = d
df[!, :death_rate] = d ./ c
df[!, :new_cases] .= 0
df[2:end, :new_cases] = df[2:end, :confirmed] .- df[1:end-1, :confirmed]
df[!, :new_deaths] .= 0
df[2:end, :new_deaths] = df[2:end, :deaths] .- df[1:end-1, :deaths]
df[!, :acceleration_cases] .= 0
df[2:end, :acceleration_cases] = df[2:end, :new_cases] .- df[1:end-1, :new_cases]
df[!, :acceleration_deaths] .= 0
df[2:end, :acceleration_deaths] = df[2:end, :new_deaths] .- df[1:end-1, :new_deaths]
df[!, :days_since_100] .= 0
counter = 0
for row in eachrow(df)
if row.confirmed > 100
counter += 1
end
row.days_since_100 = counter
end
df[!, :days_since_10] .= 0
counter = 0
for row in eachrow(df)
if row.deaths > 10
counter += 1
end
row.days_since_10 = counter
end
if end_date == "max"
end_date = maximum(df.date)
end
return df[(df.date .>= start_date) .& (df.date .<= end_date), :]
end
;
function plot_country(country; state=false, metric=:confirmed, start_date=Dates.Date(2020, 1, 22), end_date="max", days_since_100=false)
data = country_data(country; state=state, start_date=start_date, end_date=end_date)
if days_since_100
if metric == :confirmed
plot(data[data.days_since_100 .> 0, :days_since_100], data[data.days_since_100 .> 0, metric], label=country, legend=:outertopright, size=(1000, 500))
else
plot(data[data.days_since_10 .> 0, :days_since_10], data[data.days_since_10 .> 0, metric], label=country, legend=:outertopright, size=(1000, 500))
end
else
plot(data.date, data[!, metric], label=country, legend=:outertopright, size=(1000, 500))
end
plot!([0], linetype=:hline, color=:black, label="")
plot!(yformatter=:plain)
end
function plot_country!(country; state=false, metric=:confirmed, start_date=Dates.Date(2020, 1, 22), end_date="max", days_since_100=false)
data = country_data(country; state=state, start_date=start_date, end_date=end_date)
if days_since_100
if metric == :confirmed
plot!(data[data.days_since_100 .> 0, :days_since_100], data[data.days_since_100 .> 0, metric], label=country, legend=:outertopright, size=(1000, 500))
else
plot!(data[data.days_since_10 .> 0, :days_since_10], data[data.days_since_10 .> 0, metric], label=country, legend=:outertopright, size=(1000, 500))
end
else
plot!(data.date, data[!, metric], label=country, legend=:outertopright, size=(1000, 500))
end
plot!(yformatter=:plain)
end
bar_plot(country, metric) = bar(
all_country_data[
(all_country_data.country .== country) .& (all_country_data.date .>= Dates.Date(2020, 1, 1)),
:date
],
all_country_data[
(all_country_data.country .== country) .& (all_country_data.date .>= Dates.Date(2020, 1, 1)),
metric
],
legend=false,
linecolor=1,
title=country,
yformatter=:plain
)
function accel_plot(country_name="World")
if country_name == "World"
tmp = all_country_data
else
tmp = all_country_data[all_country_data.country .== country_name, :]
end
agg = by(tmp, [:date], :confirmed => sum, :deaths => sum)
conf = plot(agg.date, agg.confirmed_sum, legend=false, title="$country_name Confirmed Cases", size=(1400, 700), color=:grey, yformatter=:plain)
death_plot = plot(agg.date, agg.deaths_sum, legend=false, title="$country_name Deaths", size=(1400, 700), color=:orange, yformatter=:plain)
# derivates
agg[!, :new_cases] .= 0
agg[!, :new_cases][2:end] = agg.confirmed_sum[2:end] - agg.confirmed_sum[1:end-1]
agg[!, :new_deaths] .= 0
agg[!, :new_deaths][2:end] = agg.deaths_sum[2:end] - agg.deaths_sum[1:end-1]
agg[!, :conf_acceleration] .= 0
agg[!, :conf_acceleration][2:end] = agg.new_cases[2:end] - agg.new_cases[1:end-1]
agg[!, :death_acceleration] .= 0
agg[!, :death_acceleration][2:end] = agg.new_deaths[2:end] - agg.new_deaths[1:end-1]
# moving average
moving_average(vs,n) = [sum(@view vs[i:(i+n-1)])/n for i in 1:(length(vs)-(n-1))]
n_days = 3
ma_date = agg.date[1 + n_days-1:end]
conf_slope = moving_average(agg.new_cases, n_days)
death_slope = moving_average(agg.new_deaths, n_days)
conf_acc = moving_average(agg.conf_acceleration, n_days)
death_acc = moving_average(agg.death_acceleration, n_days)
p_conf_slope = plot(ma_date, conf_slope, legend=false, title="$country_name 1st Derivative (new cases)", size=(1400, 700), color=:grey)
plot!([0], linetype=:hline, color=:black, label="", yformatter=:plain)
p_death_slope = plot(ma_date, death_slope, legend=false, title="$country_name 1st Derivative (new deaths)", size=(1400, 700), color=:orange)
plot!([0], linetype=:hline, color=:black, label="", yformatter=:plain)
p_conf_acc = plot(ma_date, conf_acc, legend=false, title="$country_name 2nd Derivative (cases acceleration)", size=(1400, 700), color=:grey)
plot!([0], linetype=:hline, color=:black, label="", yformatter=:plain)
p_death_acc = plot(ma_date, death_acc, legend=false, title="$country_name 2nd Derivative (deaths acceleration)", size=(1400, 700), color=:orange)
plot!([0], linetype=:hline, color=:black, label="", yformatter=:plain)
plot(conf, death_plot, p_conf_slope, p_death_slope, p_conf_acc, p_death_acc, layout=(3,2))
end
;
all_countries = death.country |> unique |> sort
all_country_data = vcat([country_data(country) for country in all_countries]...)
current_state = by(all_country_data, [:country], :confirmed => maximum, :deaths => maximum)
current_state[!, :death_rate] = current_state.deaths_maximum ./ current_state.confirmed_maximum
;
Markdown.parse("""
# Overview of COVID-19 as of $(maximum(all_country_data.date))
This page uses data from https://github.com/CSSEGISandData/COVID-19 (Johns Hopkins CSSE is the original source) to create a report on the status of COVID-19 cases and deaths around the world. It is updated once a day around 10:00am CET.
""")
tmp = current_state
Markdown.parse("""
- Confirmed cases: $(sum(tmp.confirmed_maximum))
- Deaths: $(sum(tmp.deaths_maximum))
- Death Rate: $(round(sum(tmp.deaths_maximum) / sum(tmp.confirmed_maximum) * 100, digits=2))%
""")
accel_plot("World")
plot!(size=(900, 700))
top_10 = sort(current_state, order(:confirmed_maximum, rev=true))[1:10, :]
top_10[!, :death_rate] = string.(round.(top_10.death_rate * 100, digits=2)) .* "%"
Markdown.parse(
markdown_table(top_10, ["Country", "Confirmed Cases", "Deaths", "Death Rate"]; df=true)
)
The Acceleration of Last 5 Days column is calculated by the average second derivative over the last 5 days / number of cases 5 days ago. It doesn't have much intrinsic meaning but is rather a more comparable/relative measure between countries of how fast new cases are accelerating.
countries = unique(all_country_data.country)
acceleration = []
cases_5_ago = []
cases_now = []
for country in countries
metric = mean(all_country_data[all_country_data.country .== country, :acceleration_cases][end-4:end])
metric /= all_country_data[all_country_data.country .== country, :confirmed][end-4]
if isnan(metric)
metric = 0
end
push!(acceleration, metric)
push!(cases_5_ago, all_country_data[all_country_data.country .== country, :confirmed][end-4])
push!(cases_now, all_country_data[all_country_data.country .== country, :confirmed][end])
end
last_5 = rename!(DataFrame([countries, acceleration, cases_now, cases_5_ago]), [:country, :last_5_accel, :cases_now, :cases_5_ago])
last_5[!, :perc_increase] = last_5.cases_now ./ last_5.cases_5_ago .- 1
tmp = sort(last_5[(last_5.cases_5_ago .> 20) .& (last_5.cases_now .> 1000), :], order(:last_5_accel, rev=true))
tmp[!, :perc_increase] = string.(round.(tmp.perc_increase .* 100, digits=2)) .* "%"
rename!(tmp, [:Country, Symbol("Acceleration of Last 5 Days"), Symbol("Cases Now"), Symbol("Cases 5 Days Ago"), Symbol("% Increase in 5 Days")])
tmp[!, Symbol("Acceleration of Last 5 Days")] = string.(round.(tmp[!, Symbol("Acceleration of Last 5 Days")] * 100, digits=2)) .* "%"
Markdown.parse(
markdown_table(tmp, string.(names(tmp)), df=true)
)
function individual_country(country; comparison_country="Italy")
# line plots
cases_100 = plot_country(country, days_since_100=true, metric=:confirmed)
plot_country!(comparison_country, days_since_100=true, metric=:confirmed)
plot!(title="$country Days Since 100th Case", titlefont=font(10))
deaths_10 = plot_country(country, days_since_100=true, metric=:deaths)
plot_country!(comparison_country, days_since_100=true, metric=:deaths)
plot!(title="$country Days Since 10th Death", titlefont=font(10))
line_plots = plot(cases_100, deaths_10, layout=(2,1))
#accel plots
accel_plots = accel_plot(country)
plot!(titlefont=font(10))
# bar plots
cases_bar = bar_plot(country, :new_cases)
plot!(title="$country New Cases Per Day", titlefont=font(10))
deaths_bar = bar_plot(country, :new_deaths)
plot!(title="$country Deaths Per Day", titlefont=font(10))
bar_plots = plot(cases_bar, deaths_bar, layout=(2,1))
final_plot = plot(accel_plots, line_plots, bar_plots, layout=(3, 1))
plot!(size=(900, 1400))
return final_plot
end
;
Days since 100th case and 10th death plotted with Italy for comparison. Only countries with >= 500 confirmed cases show graphs. Derivative plots are 3 days moving average.
for coun in all_countries
data = country_data(coun)
display(Markdown.parse("## $coun"))
Markdown.parse("""
- Confirmed cases: $(maximum(data.confirmed))
- Deaths: $(maximum(data.deaths))
- Death Rate: $(round(maximum(data.deaths) / maximum(data.confirmed) * 100, digits=2))%
""") |> display
if maximum(data.confirmed) >= 500 || coun == "Hungary"
display(individual_country(coun))
end
end